In [3]:
import pandas as pd
In [4]:
import numpy as np
In [5]:
import sys
sys.path.append('..')
In [6]:
from dis_ds import parsing
In [7]:
all_files = !ls ../test_data
In [8]:
full_path_all_files = ['../test_data/' + a for a in all_files]
In [9]:
all_files_df = parsing.parse_file_list(full_path_all_files)
In [14]:
all_files_df[:1000]
Out[14]:
bakerloo
central
circle
district
hammersmith-city
jubilee
metropolitan
northern
piccadilly
victoria
waterloo-city
2015-02-24 11:51:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:52:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:53:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:54:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:55:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:56:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:01:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:02:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:03:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:04:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:05:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:06:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:07:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:08:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:09:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:10:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:11:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:12:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:13:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:14:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:15:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:16:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:17:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:18:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:19:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:20:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:21:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:22:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:23:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:24:14
10
10
10
10
10
10
10
10
10
10
10
...
...
...
...
...
...
...
...
...
...
...
...
2015-02-25 04:05:15
10
10
10
10
20
20
20
20
10
20
20
2015-02-25 04:06:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:07:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:08:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:09:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:10:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:11:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:12:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:13:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:14:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:15:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:16:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:17:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:18:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:19:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:20:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:21:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:22:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:23:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:24:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:25:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:26:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:27:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:28:15
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:29:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:30:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:31:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:32:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:33:14
10
10
10
10
10
10
10
10
10
10
20
2015-02-25 04:34:14
10
10
10
10
10
10
10
10
10
10
20
1000 rows × 11 columns
In [17]:
import xlsxwriter
writer = pd.ExcelWriter('tfldata.xlsx', engine='xlsxwriter')
all_files_df.to_excel(writer, sheet_name="Sheet 1")
In [11]:
all_files_df.save(all_files_df)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py:1000: FutureWarning: save is deprecated, use to_pickle
warnings.warn("save is deprecated, use to_pickle", FutureWarning)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-33722759911e> in <module>()
----> 1 all_files_df.save(all_files_df)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in save(self, path)
999 from pandas.io.pickle import to_pickle
1000 warnings.warn("save is deprecated, use to_pickle", FutureWarning)
-> 1001 return to_pickle(self, path)
1002
1003 def load(self, path): # TODO remove in 0.14
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/pickle.py in to_pickle(obj, path)
11 File path
12 """
---> 13 with open(path, 'wb') as f:
14 pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
15
TypeError: invalid file: bakerloo central circle district hammersmith-city \
2015-02-24 11:51:45 10 10 10 10 10
2015-02-24 11:52:44 10 10 10 10 10
2015-02-24 11:53:44 10 10 10 10 10
2015-02-24 11:54:45 10 10 10 10 10
2015-02-24 11:55:44 10 10 10 10 10
2015-02-24 11:56:45 10 10 10 10 10
2015-02-24 12:01:14 10 10 10 10 10
2015-02-24 12:02:15 10 10 10 10 10
2015-02-24 12:03:14 10 10 10 10 10
2015-02-24 12:04:14 10 10 10 10 10
2015-02-24 12:05:14 10 10 10 10 10
2015-02-24 12:06:14 10 10 10 10 10
2015-02-24 12:07:15 10 10 10 10 10
2015-02-24 12:08:15 10 10 10 10 10
2015-02-24 12:09:14 10 10 10 10 10
2015-02-24 12:10:14 10 10 10 10 10
2015-02-24 12:11:15 10 10 10 10 10
2015-02-24 12:12:14 10 10 10 10 10
2015-02-24 12:13:14 10 10 10 10 10
2015-02-24 12:14:14 10 10 10 10 10
2015-02-24 12:15:14 10 10 10 10 10
2015-02-24 12:16:14 10 10 10 10 10
2015-02-24 12:17:15 10 10 10 10 10
2015-02-24 12:18:14 10 10 10 10 10
2015-02-24 12:19:14 10 10 10 10 10
2015-02-24 12:20:14 10 10 10 10 10
2015-02-24 12:21:15 10 10 10 10 10
2015-02-24 12:22:15 10 10 10 10 10
2015-02-24 12:23:14 10 10 10 10 10
2015-02-24 12:24:14 10 10 10 10 10
... ... ... ... ... ...
2015-03-03 09:29:13 10 10 10 10 10
2015-03-03 09:30:14 10 10 10 10 10
2015-03-03 09:31:14 10 10 10 10 10
2015-03-03 09:32:13 10 10 10 10 10
2015-03-03 09:33:13 10 10 10 10 10
2015-03-03 09:34:14 10 10 10 10 10
2015-03-03 09:35:14 10 10 10 10 10
2015-03-03 09:36:13 10 10 10 10 10
2015-03-03 09:37:14 10 10 10 10 10
2015-03-03 09:38:14 10 10 10 10 10
2015-03-03 09:39:14 10 10 10 10 10
2015-03-03 09:40:14 10 10 10 10 10
2015-03-03 09:41:14 10 10 10 10 10
2015-03-03 09:42:13 10 10 10 10 10
2015-03-03 09:43:14 10 10 10 10 10
2015-03-03 09:44:14 10 10 10 10 10
2015-03-03 09:45:15 10 10 10 10 10
2015-03-03 09:46:15 10 10 10 10 10
2015-03-03 09:47:14 10 10 10 10 10
2015-03-03 09:48:14 10 10 10 10 10
2015-03-03 09:49:13 10 10 10 10 10
2015-03-03 09:50:14 10 10 10 10 10
2015-03-03 09:51:14 10 10 10 10 10
2015-03-03 09:52:13 10 10 10 10 10
2015-03-03 09:53:14 10 10 10 10 10
2015-03-03 09:54:13 10 10 10 10 10
2015-03-03 09:55:14 10 10 10 10 10
2015-03-03 09:56:14 10 10 10 10 10
2015-03-03 09:57:14 10 10 10 10 10
2015-03-03 09:58:13 10 10 10 10 10
jubilee metropolitan northern piccadilly victoria \
2015-02-24 11:51:45 10 10 10 10 10
2015-02-24 11:52:44 10 10 10 10 10
2015-02-24 11:53:44 10 10 10 10 10
2015-02-24 11:54:45 10 10 10 10 10
2015-02-24 11:55:44 10 10 10 10 10
2015-02-24 11:56:45 10 10 10 10 10
2015-02-24 12:01:14 10 10 10 10 10
2015-02-24 12:02:15 10 10 10 10 10
2015-02-24 12:03:14 10 10 10 10 10
2015-02-24 12:04:14 10 10 10 10 10
2015-02-24 12:05:14 10 10 10 10 10
2015-02-24 12:06:14 10 10 10 10 10
2015-02-24 12:07:15 10 10 10 10 10
2015-02-24 12:08:15 10 10 10 10 10
2015-02-24 12:09:14 10 10 10 10 10
2015-02-24 12:10:14 10 10 10 10 10
2015-02-24 12:11:15 10 10 10 10 10
2015-02-24 12:12:14 10 10 10 10 10
2015-02-24 12:13:14 10 10 10 10 10
2015-02-24 12:14:14 10 10 10 10 10
2015-02-24 12:15:14 10 10 10 10 10
2015-02-24 12:16:14 10 10 10 10 10
2015-02-24 12:17:15 10 10 10 10 10
2015-02-24 12:18:14 10 10 10 10 10
2015-02-24 12:19:14 10 10 10 10 10
2015-02-24 12:20:14 10 10 10 10 10
2015-02-24 12:21:15 10 10 10 10 10
2015-02-24 12:22:15 10 10 10 10 10
2015-02-24 12:23:14 10 10 10 10 10
2015-02-24 12:24:14 10 10 10 10 10
... ... ... ... ... ...
2015-03-03 09:29:13 10 10 10 10 10
2015-03-03 09:30:14 10 10 10 10 10
2015-03-03 09:31:14 10 10 10 10 10
2015-03-03 09:32:13 10 10 10 10 10
2015-03-03 09:33:13 10 10 10 10 10
2015-03-03 09:34:14 10 10 10 10 10
2015-03-03 09:35:14 10 10 10 10 10
2015-03-03 09:36:13 10 10 10 10 10
2015-03-03 09:37:14 10 10 10 10 10
2015-03-03 09:38:14 10 10 10 10 10
2015-03-03 09:39:14 10 10 10 10 10
2015-03-03 09:40:14 10 10 10 10 10
2015-03-03 09:41:14 10 10 10 10 10
2015-03-03 09:42:13 10 10 10 10 10
2015-03-03 09:43:14 10 10 10 10 10
2015-03-03 09:44:14 10 10 10 10 10
2015-03-03 09:45:15 10 10 10 10 10
2015-03-03 09:46:15 10 10 10 10 10
2015-03-03 09:47:14 10 10 10 10 10
2015-03-03 09:48:14 10 10 10 10 10
2015-03-03 09:49:13 10 10 10 10 10
2015-03-03 09:50:14 10 10 10 10 10
2015-03-03 09:51:14 10 10 10 10 10
2015-03-03 09:52:13 10 10 10 10 10
2015-03-03 09:53:14 10 10 10 10 10
2015-03-03 09:54:13 10 10 10 10 10
2015-03-03 09:55:14 10 10 10 10 10
2015-03-03 09:56:14 10 10 10 10 10
2015-03-03 09:57:14 10 10 10 10 10
2015-03-03 09:58:13 10 10 10 10 10
waterloo-city
2015-02-24 11:51:45 10
2015-02-24 11:52:44 10
2015-02-24 11:53:44 10
2015-02-24 11:54:45 10
2015-02-24 11:55:44 10
2015-02-24 11:56:45 10
2015-02-24 12:01:14 10
2015-02-24 12:02:15 10
2015-02-24 12:03:14 10
2015-02-24 12:04:14 10
2015-02-24 12:05:14 10
2015-02-24 12:06:14 10
2015-02-24 12:07:15 10
2015-02-24 12:08:15 10
2015-02-24 12:09:14 10
2015-02-24 12:10:14 10
2015-02-24 12:11:15 10
2015-02-24 12:12:14 10
2015-02-24 12:13:14 10
2015-02-24 12:14:14 10
2015-02-24 12:15:14 10
2015-02-24 12:16:14 10
2015-02-24 12:17:15 10
2015-02-24 12:18:14 10
2015-02-24 12:19:14 10
2015-02-24 12:20:14 10
2015-02-24 12:21:15 10
2015-02-24 12:22:15 10
2015-02-24 12:23:14 10
2015-02-24 12:24:14 10
... ...
2015-03-03 09:29:13 10
2015-03-03 09:30:14 10
2015-03-03 09:31:14 10
2015-03-03 09:32:13 10
2015-03-03 09:33:13 10
2015-03-03 09:34:14 10
2015-03-03 09:35:14 10
2015-03-03 09:36:13 10
2015-03-03 09:37:14 10
2015-03-03 09:38:14 10
2015-03-03 09:39:14 10
2015-03-03 09:40:14 10
2015-03-03 09:41:14 10
2015-03-03 09:42:13 10
2015-03-03 09:43:14 10
2015-03-03 09:44:14 10
2015-03-03 09:45:15 10
2015-03-03 09:46:15 10
2015-03-03 09:47:14 10
2015-03-03 09:48:14 10
2015-03-03 09:49:13 10
2015-03-03 09:50:14 10
2015-03-03 09:51:14 10
2015-03-03 09:52:13 10
2015-03-03 09:53:14 10
2015-03-03 09:54:13 10
2015-03-03 09:55:14 10
2015-03-03 09:56:14 10
2015-03-03 09:57:14 10
2015-03-03 09:58:13 10
[9944 rows x 11 columns]
In [13]:
%save?
In [16]:
from sqlalchemy import create_engine
In [17]:
engine = create_engine('postgres://pmgigyko:Mb7sR3WMZSNPYjm4FTvS0WRDhtqUgcam@pellefant.db.elephantsql.com:5432/pmgigyko')
In [18]:
all_files_df.to_sql('disruptions_test1',engine)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-111b9adef532> in <module>()
----> 1 all_files_df.to_sql('disruptions_test1',engine)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
980 self, name, con, flavor=flavor, schema=schema, if_exists=if_exists,
981 index=index, index_label=index_label, chunksize=chunksize,
--> 982 dtype=dtype)
983
984 def to_pickle(self, path):
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
547 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
548 index_label=index_label, schema=schema,
--> 549 chunksize=chunksize, dtype=dtype)
550
551
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1185 if_exists=if_exists, index_label=index_label,
1186 schema=schema, dtype=dtype)
-> 1187 table.create()
1188 table.insert(chunksize)
1189 # check for potentially case sensitivity issues (GH7815)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in create(self)
648 if self.exists():
649 if self.if_exists == 'fail':
--> 650 raise ValueError("Table '%s' already exists." % self.name)
651 elif self.if_exists == 'replace':
652 self.pd_sql.drop_table(self.name, self.schema)
ValueError: Table 'disruptions_test1' already exists.
In [22]:
df_feb = parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-02')
In [24]:
df_march=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-03')
In [25]:
df_april=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-04')
In [31]:
df_may=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')
In [32]:
df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()
---------------------------------------------------------------------------
S3ResponseError Traceback (most recent call last)
<ipython-input-32-f771dfcf1dae> in <module>()
----> 1 df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()
/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
74 b = c.get_bucket('pivotal-london-dis')
75 key_list = b.list(prefix=file_prefix)
---> 76 return parse_file_list(key_list)
77
/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
65
66 def parse_file_list(file_list):
---> 67 result_list = [parse_file(file) for file in file_list]
68 result_df = pd.concat(result_list)
69 return result_df
/Users/pivotal/dis/dis_ds/parsing.py in <listcomp>(.0)
65
66 def parse_file_list(file_list):
---> 67 result_list = [parse_file(file) for file in file_list]
68 result_df = pd.concat(result_list)
69 return result_df
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucketlistresultset.py in bucket_lister(bucket, prefix, delimiter, marker, headers, encoding_type)
32 rs = bucket.get_all_keys(prefix=prefix, marker=marker,
33 delimiter=delimiter, headers=headers,
---> 34 encoding_type=encoding_type)
35 for k in rs:
36 yield k
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in get_all_keys(self, headers, **params)
470 return self._get_all([('Contents', self.key_class),
471 ('CommonPrefixes', Prefix)],
--> 472 '', headers, **params)
473
474 def get_all_versions(self, headers=None, **params):
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in _get_all(self, element_map, initial_query_string, headers, **params)
408 else:
409 raise self.connection.provider.storage_response_error(
--> 410 response.status, response.reason, body)
411
412 def validate_kwarg_names(self, kwargs, names):
S3ResponseError: S3ResponseError: 403 Forbidden
<?xml version="1.0" encoding="UTF-8"?>
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>Sat, 17 Oct 2015 04:02:16 GMT</RequestTime><ServerTime>2015-10-17T06:04:18Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>D656B3EB85DB0AC7</RequestId><HostId>h7a9QifJel2RsnFl4/Whqi/Muc9LqeIfOHj0fotpfE0WnwSNobbCramJkBGl+DvudoFLLA48ZU0=</HostId></Error>
In [27]:
frames = (df_feb,df_march,df_april,df_may)
In [28]:
total_df = pd.concat(frames)
In [29]:
total_df
Out[29]:
bakerloo
central
circle
district
hammersmith-city
jubilee
metropolitan
northern
piccadilly
victoria
waterloo-city
2015-02-24 11:51:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:52:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:53:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:54:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:55:44
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 11:56:45
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:01:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:02:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:03:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:04:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:05:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:06:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:07:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:08:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:09:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:10:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:11:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:12:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:13:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:14:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:15:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:16:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:17:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:18:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:19:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:20:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:21:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:22:15
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:23:14
10
10
10
10
10
10
10
10
10
10
10
2015-02-24 12:24:14
10
10
10
10
10
10
10
10
10
10
10
...
...
...
...
...
...
...
...
...
...
...
...
2015-05-31 15:47:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 16:04:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 16:21:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 16:37:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 16:54:36
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 17:11:14
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 17:27:54
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 17:44:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 18:01:18
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 18:17:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 18:34:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 18:51:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 19:07:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 19:24:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 19:41:16
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 19:57:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 20:14:45
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 20:31:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 20:47:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 21:04:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 21:21:26
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 21:37:55
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 21:54:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 22:11:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 22:27:54
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 22:44:35
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 23:01:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 23:17:56
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 23:34:36
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2015-05-31 23:51:15
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
132702 rows × 11 columns
In [ ]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')
In [ ]:
s3_files_df=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-')
In [ ]:
s3_files_df
In [19]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-24_07:16:27')
Out[19]:
bakerloo
central
circle
district
hammersmith-city
jubilee
metropolitan
northern
piccadilly
victoria
waterloo-city
2015-09-24 07:16:27
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
In [28]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-28-be5970fda21c> in <module>()
----> 1 parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')
/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
74 b = c.get_bucket('pivotal-london-dis')
75 key_list = b.list(prefix=file_prefix)
---> 76 return parse_file_list(key_list)
77
/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
66 def parse_file_list(file_list):
67 result_list = [parse_file(file) for file in file_list]
---> 68 result_df = pd.concat(result_list)
69 return result_df
70
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
752 keys=keys, levels=levels, names=names,
753 verify_integrity=verify_integrity,
--> 754 copy=copy)
755 return op.get_result()
756
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
797
798 if len(objs) == 0:
--> 799 raise ValueError('All objects passed were None')
800
801 # consolidate data & figure out what our result ndim is going to be
ValueError: All objects passed were None
In [46]:
s3_files_df.describe()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-46-0a7a1fef5d51> in <module>()
----> 1 pd.s3_files_df.describe()
AttributeError: 'module' object has no attribute 's3_files_df'
In [ ]:
s3_files_df
In [ ]:
Content source: ihuston/dis-datascience
Similar notebooks: